The subsequent investigation is related to the hypothesis of the Environmental Kuznets Curve (EKC) which states that emissions decline with increasing GDP as soon as a certain economic wealth is reached. Hence, it assumes a quadratic relationship between GDP and emission.
If this hypothesis holds, the established saying of “Green growth” might be feasible. However, the following analysis of World Bank data from 2006-2018 indicates that the EKC hypothesis has to be rejected.
The investigation is structured as follows. First, there is an animated graph followed by an interactive world map which represents a descriptive analysis to visualize the underlying data. It is continued by a regression analysis and a brief machine learning chapter two show possible examination methods.
Used Packages - gganimate, ggplot2, dplyr, ggthemes, tidyverse, countrycode, plotly
########### Loading & Checking Data #######
world_bank <- read.csv("world_bank.csv")
######## Data Preparation #####################
world_bank <- world_bank %>%
rename(pop=Population..total..SP.POP.TOTL.,
gdpPercap=GDP.per.capita..PPP..constant.2017.international.....NY.GDP.PCAP.PP.KD.,
CO2_PerCap=CO2.emissions..metric.tons.per.capita...EN.ATM.CO2E.PC.,
year=ï..Time)
####### Getting continent variable ######
world_bank$continent <- countrycode(sourcevar = world_bank[, "Country.Code"],
origin = "genc3c",
destination = "continent")
####### Adjusting the variables ########
world_bank$pop <- as.integer(world_bank$pop)
world_bank$pop_million <- world_bank$pop/1000000
world_bank$gdpPercap <- as.numeric(world_bank$gdpPercap)
world_bank$gdpPercap_1000 <- world_bank$gdpPercap/1000
world_bank$year <- as.integer(world_bank$year)
world_bank$CO2_PerCap <- as.numeric(world_bank$CO2_PerCap)
world_bank_complete <- na.omit(world_bank)
###### starting the plot ######
world_bank_graph <- world_bank_complete %>%
ggplot(aes(x=gdpPercap_1000, y=CO2_PerCap, color=continent, size=pop_million, label=Country.Name)) +
geom_point(alpha=0.7) +
geom_text(data = subset(world_bank_complete,
Country.Code=="USA"|Country.Code=="CHN"),
show.legend = FALSE)+
theme_fivethirtyeight() +
guides(color = guide_legend(override.aes = list(size = 4)))+
labs(title = expression("CO2 Emission with GDP per Capita by Country"),
x = "GDP per capita",
y="CO2 emission per capita",
caption = "Source: World Bank",
color="Continent",
size="Population in Mio")+
theme(axis.title = element_text(),
text = element_text(family = "Rubik"),
legend.text = element_text(family ="Rubik",size = 11),
legend.key.size = unit(.5, "cm")) +
scale_color_brewer(palette = "Set2")
world_bank_graph
world_bank_graph.animation <- world_bank_graph +
transition_time(year) +
labs(subtitle =" Year: {frame_time}")
world_bank_graph.animation
####### PLOT ########
#### Add hover variable (used later) ####
world_bank_complete <- world_bank_complete %>%
mutate(hover = paste(Country.Name, "\n",
lapply(world_bank_complete[,5], round, 2), "CO2 tons per capita", "\n",
lapply(world_bank_complete[,10], round,2), "GDP per capita in 1000$"))
world_co2_graph <- plot_geo(world_bank_complete,
frame =~year) %>%
add_trace(locations = ~Country.Code,
z=~CO2_PerCap,
zmin=0,
zmax=20,
color=~CO2_PerCap,
text=~hover,
hoverinfo=~"text",
colorscale="Inferno") %>%
layout(geo=list(projection = list( type = "orthographic")),
font = list(family="Rubik Black", size=11),
title="CO2 emission in metric tons per capita 2006-2018")%>%
colorbar(title="CO2 in t") %>%
config(displayModeBar=FALSE)
world_co2_graph
Used Packages - h2o, ranger, plm, sjPlot, PanJen
# Creation of continent dummys
world_bank_complete$continent <- as.factor(world_bank_complete$continent)
# Creating panel data
panel_worldbank_data <- pdata.frame(world_bank_complete,c("Country.Code","year"))
# Find the best specification for the CO2 model
form<-formula(log(CO2_PerCap) ~ pop_million + continent,
data = panel_worldbank_data)
fxlist= list(
linear = function(x) x,
sqr = function(x) x^2,
poly = function(x) x^3,
log = function(x) log(x),
inverse = function(x) 1/(x)
)
model<-choose.fform(data=panel_worldbank_data,variable="gdpPercap_1000",
base_form=form, functionList=fxlist)
## AIC BIC ranking (BIC)
## smoothing 3708.26 3800.24 1
## log 3890.91 3937.02 2
## inverse 5149.81 5195.91 3
## linear 6344.19 6390.29 4
## sqr 7038.23 7084.33 5
## poly 7248.77 7294.87 6
## base 7445.35 7485.69 7
## [1] "Smoothing is a semi-parametric and data-driven transformation, please see Wood (2006) for an elaboration"
### Random and Fixed effects estimation #####
random_effectes_reg <- plm(log(CO2_PerCap) ~ log(gdpPercap_1000) + pop_million, # + continent,
data = panel_worldbank_data, model="random", index = c("Country.Code","year"))
fixed_effectes_reg <- plm(log(CO2_PerCap) ~ log(gdpPercap_1000) + pop_million, # + continent,
data = panel_worldbank_data, model="within", index = c("Country.Code","year"))
##### Hausmanntest ###
phtest(fixed_effectes_reg,random_effectes_reg)
##
## Hausman Test
##
## data: log(CO2_PerCap) ~ log(gdpPercap_1000) + pop_million
## chisq = 108.1, df = 2, p-value < 2.2e-16
## alternative hypothesis: one model is inconsistent
# RE in inconsistent
#### Setting up correlated random effects model ####
#### Taking the mean of the time varying variables ####
# Average per Country of population in Million #
aggre_table<-aggregate(panel_worldbank_data$pop_million,
by=list(panel_worldbank_data$Country.Code), FUN=mean)
for(i in unique(panel_worldbank_data$Country.Code)){
panel_worldbank_data$pop_million_mean[panel_worldbank_data$Country.Code==i]<-aggre_table[aggre_table[,1]==i,2]}
# Average per Country of GDP per Capita #
aggre_table<-aggregate(panel_worldbank_data$gdpPercap_1000,
by=list(panel_worldbank_data$Country.Code), FUN=mean)
for(i in unique(panel_worldbank_data$Country.Code)){
panel_worldbank_data$gdpPercap_1000_mean[panel_worldbank_data$Country.Code==i]<-aggre_table[aggre_table[,1]==i,2]}
### Model estimation of Correlated Random effects ####
corr_random_effectes_reg <- plm(log(CO2_PerCap) ~ log(gdpPercap_1000) + pop_million + continent
+ gdpPercap_1000_mean,
data = panel_worldbank_data, model="random")
# Pop_milion_mean is taken out due to multicollinarity. gdpPercap_1000_mean is significant, hence
# CRE estimation is necessary as predicted by the Hausmann test
tab_model(corr_random_effectes_reg, file="output.html",
title="Correlated Random Effects", vcov.type = "HC3", robust=TRUE)
| log(CO2_PerCap) | |||
|---|---|---|---|
| Predictors | Estimates | CI | p |
| (Intercept) | -1.95 | -2.23 – -1.67 | <0.001 |
| gdpPercap_1000 [log] | 0.86 | 0.68 – 1.04 | <0.001 |
| pop_million | 0.00 | -0.00 – 0.00 | 0.130 |
| continent [Americas] | 0.49 | 0.19 – 0.78 | 0.001 |
| continent [Asia] | 0.67 | 0.32 – 1.03 | <0.001 |
| continent [Europe] | 0.52 | 0.16 – 0.87 | 0.004 |
| continent [Oceania] | 0.88 | 0.53 – 1.23 | <0.001 |
| gdpPercap_1000_mean | 0.01 | 0.00 – 0.02 | 0.046 |
| Observations | 2352 | ||
| R2 / R2 adjusted | 0.455 / 0.453 | ||
Used Packages - randomForest, randomUniformForest, plotmo, gbm
######################################################
###### Defining Functions for Machine Learning #######
###### data partitioning ###################
TVHsplit<-function(df, split = c(0.5, 0.5),
labels = c("T", "V"), iseed = 1176){
set.seed(iseed)
flags <- sample(labels, size = nrow(df),
prob = split, replace = TRUE)
return(flags)
}
#### Performance Test ######
ValidationRsquared<-function(validObs, validHat){
resids <- validHat - validObs
yBar <- mean(validObs)
offset <- validObs - yBar
num <- sum(resids^2)
denom <- sum(offset^2)
Rsq <- 1 - num/denom
return(Rsq)
}
# End of defining functions
### generate a training and a validation dataset ####
WorldBankFlag <- TVHsplit(world_bank_complete, split = c(0.7, 0.3),
labels = c("T", "V"))
WorldBankTrain <- world_bank_complete[which(WorldBankFlag == "T"), ]
WorldBankValid <- world_bank_complete[which(WorldBankFlag == "V"), ]
# Machine learning estimation - Random Forest
rfCO2_model<-randomForest(CO2_PerCap ~ gdpPercap_1000 + year + pop_million + continent + Country.Name,
data = WorldBankTrain, importance = TRUE)
# Performance Test
rfCO2_modelHatV <- predict(rfCO2_model, newdata = WorldBankValid)
ValidationRsquared(WorldBankValid$CO2_PerCap, rfCO2_modelHatV)
## [1] 0.8715406
# Visualization of Machine Learning Estimation
plot(rfCO2_model)
varImpPlot(rfCO2_model)
# compare predictions to actual results
prediction_results_forest <- data.frame(WorldBankValid$CO2_PerCap, rfCO2_modelHatV,
WorldBankValid$Country.Name, WorldBankValid$year)
colnames(prediction_results_forest) <- c("Actual CO2 pp", "Forest prediction CO2 pp",
"Country", "Year")
head(prediction_results_forest, n=10L)
## Actual CO2 pp Forest prediction CO2 pp Country Year
## 7 5.3194705 6.6282075 Antigua and Barbuda 2006
## 12 8.9615694 9.1234644 Austria 2006
## 16 0.2547524 1.7030228 Bangladesh 2006
## 17 5.2977746 6.9624355 Barbados 2006
## 32 0.0249742 0.6580490 Burundi 2006
## 35 0.3813720 0.7026253 Cameroon 2006
## 36 16.6119281 12.2180342 Canada 2006
## 38 0.0631363 0.8834994 Central African Republic 2006
## 41 3.4883351 4.2863329 Chile 2006
## 43 1.3360832 2.5163303 Colombia 2006
##########################################################
# Machine learning estimation - Gradient Boosting Machines
gbm_co2_Model <- gbm(CO2_PerCap ~ gdpPercap_1000 + year + pop_million + continent,
data = WorldBankTrain,
distribution = "gaussian",
interaction.depth = 40,
shrinkage = 0.05,
n.trees = 2000,
n.minobsinnode = 10)
# Performance Test
gbm_co2_ModelHatV <- predict(gbm_co2_Model, newdata = WorldBankValid,
n.trees = 1000)
ValidationRsquared(WorldBankValid$CO2_PerCap, gbm_co2_ModelHatV)
## [1] 0.9405641
# Visualization of Machine Learning Estimation
plotmo(gbm_co2_Model)
## plotmo grid: gdpPercap_1000 year pop_million continent
## 11.70814 2012 7.912398 Africa
# compare predictions to actual results
prediction_results_gbm <- data.frame(WorldBankValid$CO2_PerCap, gbm_co2_ModelHatV,
WorldBankValid$Country.Name, WorldBankValid$year)
colnames(prediction_results_gbm) <- c("Actual CO2 pp", "GBM prediction CO2 pp",
"Country", "Year")
head(prediction_results_gbm, n=10L)
## Actual CO2 pp GBM prediction CO2 pp Country Year
## 1 5.3194705 6.52236822 Antigua and Barbuda 2006
## 2 8.9615694 9.72331231 Austria 2006
## 3 0.2547524 0.19296383 Bangladesh 2006
## 4 5.2977746 9.58067140 Barbados 2006
## 5 0.0249742 0.06796703 Burundi 2006
## 6 0.3813720 0.23996058 Cameroon 2006
## 7 16.6119281 15.48849908 Canada 2006
## 8 0.0631363 0.45428269 Central African Republic 2006
## 9 3.4883351 5.55888444 Chile 2006
## 10 1.3360832 1.61794033 Colombia 2006